In [1]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split,cross_val_score, KFold, cross_val_predict
from sklearn.decomposition import PCA as sklearn_pca
from sklearn.decomposition import PCA
from sklearn.utils import resample
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import preprocessing, decomposition
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import RFE
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.linear_model.stochastic_gradient import SGDClassifier
import time



In [2]:

    
artworks = pd.read_csv('https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv')
artworks.columns









    Out[2]:





Index(['Title', 'Artist', 'ConstituentID', 'ArtistBio', 'Nationality',
       'BeginDate', 'EndDate', 'Gender', 'Date', 'Medium', 'Dimensions',
       'CreditLine', 'AccessionNumber', 'Classification', 'Department',
       'DateAcquired', 'Cataloged', 'ObjectID', 'URL', 'ThumbnailURL',
       'Circumference (cm)', 'Depth (cm)', 'Diameter (cm)', 'Height (cm)',
       'Length (cm)', 'Weight (kg)', 'Width (cm)', 'Seat Height (cm)',
       'Duration (sec.)'],
      dtype='object')



In [3]:

    
# Select Columns.
artworks = artworks[['Artist', 'Nationality', 'Gender', 'Date', 'Department',
                    'DateAcquired', 'URL', 'ThumbnailURL', 'Height (cm)', 'Width (cm)']]

# Convert URL's to booleans.
artworks['URL'] = artworks['URL'].notnull()
artworks['ThumbnailURL'] = artworks['ThumbnailURL'].notnull()

# Drop films and some other tricky rows.
artworks = artworks[artworks['Department']!='Film']
artworks = artworks[artworks['Department']!='Media and Performance Art']
artworks = artworks[artworks['Department']!='Fluxus Collection']

# Drop missing data.
artworks = artworks.dropna()



In [4]:

    
# Remove multiple nationalities, genders, and artists.
artworks.loc[artworks['Gender'].str.contains('\) \('), 'Gender'] = '\(multiple_persons\)'
artworks.loc[artworks['Nationality'].str.contains('\) \('), 'Nationality'] = '\(multiple_nationalities\)'
artworks.loc[artworks['Artist'].str.contains(','), 'Artist'] = 'Multiple_Artists'



In [5]:

    
artworks.Nationality.unique()









    Out[5]:





array(['(Austrian)', '(French)', '()', '(American)', '(German)',
       '\\(multiple_nationalities\\)', '(Swedish)', '(British)',
       '(Japanese)', '(Italian)', '(Argentine)', '(Swiss)', '(Brazilian)',
       '(Luxembourgish)', '(Spanish)', '(Dutch)', '(Russian)', '(Iranian)',
       '(Finnish)', '(Nationality unknown)', '(Danish)', '(Moroccan)',
       '(Colombian)', '(Australian)', '(Hungarian)', '(Belgian)',
       '(Canadian)', '(Slovenian)', '(Chilean)', '(Mexican)', '(Yugoslav)',
       '(Latvian)', '(Nationality Unknown)', '(Polish)', '(Czech)',
       '(Israeli)', '(Czechoslovakian)', '(Croatian)', '(Norwegian)',
       '(Cuban)', '(Romanian)', '(Venezuelan)', '(Uruguayan)', '(Greek)',
       '(Ukrainian)', '(Various)', '(Thai)', '(Algerian)', '(Icelandic)',
       '(Guatemalan)', '(Indian)', '(Chinese)', '(Irish)', '(Costa Rican)',
       '(Korean)', '(Ethiopian)', '(Kuwaiti)', '(Haitian)', '(Scottish)',
       '(South African)', '(Zimbabwean)', '(Portuguese)', '(Panamanian)',
       '(Ecuadorian)', '(Peruvian)', '(Congolese)', '(Malian)',
       '(Turkish)', '(Cambodian)', '(Bosnian)', '(Canadian Inuit)',
       '(Estonian)', '(Pakistani)', '(Bolivian)', '(Palestinian)',
       '(Taiwanese)', '(Paraguayan)', '(Nicaraguan)', '(Tunisian)',
       '(Sudanese)', '(Tanzanian)', '(Guyanese)', '(Senegalese)',
       '(Bahamian)', '(Bulgarian)', '(Lebanese)', '(Kenyan)', '(Nigerian)',
       '(Georgian)', '(Egyptian)', '(Albanian)', '(Azerbaijani)',
       '(Ivorian)', '(Malaysian)', '(Serbian)', '(Singaporean)',
       '(Lithuanian)', '(Slovak)', '(Namibian)', '(Ghanaian)',
       '(nationality unknown)', '(Ugandan)', '(Cameroonian)', '(English)',
       '(New Zealander)', '(Puerto Rican)'], dtype=object)



In [6]:

    
artworks1 = artworks.loc[~(artworks['Nationality'] == '()')]
artworks1 = artworks1.loc[~(artworks['Nationality'] == '(nationality unknown)')]
artworks1 = artworks1.loc[~(artworks['Nationality'] == '(Nationality unknown)')]
artworks1 = artworks1.loc[~(artworks['Nationality'] == '(Nationality Unknown)')]
artworks1['Nationality'].replace('\\(multiple_nationalities\\)', '(Multiple Nationalities)', inplace=True)



In [7]:

    
artworks1['Nationality'] = artworks1['Nationality'].apply(str).str.replace('\(|\)','')



In [8]:

    
artworks1.Nationality.unique()









    Out[8]:





array(['Austrian', 'French', 'American', 'German',
       'Multiple Nationalities', 'Swedish', 'British', 'Japanese',
       'Italian', 'Argentine', 'Swiss', 'Brazilian', 'Luxembourgish',
       'Spanish', 'Dutch', 'Russian', 'Iranian', 'Finnish', 'Danish',
       'Moroccan', 'Colombian', 'Australian', 'Hungarian', 'Belgian',
       'Canadian', 'Slovenian', 'Chilean', 'Mexican', 'Yugoslav',
       'Latvian', 'Polish', 'Czech', 'Israeli', 'Czechoslovakian',
       'Croatian', 'Norwegian', 'Cuban', 'Romanian', 'Venezuelan',
       'Uruguayan', 'Greek', 'Ukrainian', 'Various', 'Thai', 'Algerian',
       'Icelandic', 'Guatemalan', 'Indian', 'Chinese', 'Irish',
       'Costa Rican', 'Korean', 'Ethiopian', 'Kuwaiti', 'Haitian',
       'Scottish', 'South African', 'Zimbabwean', 'Portuguese',
       'Panamanian', 'Ecuadorian', 'Peruvian', 'Congolese', 'Malian',
       'Turkish', 'Cambodian', 'Bosnian', 'Canadian Inuit', 'Estonian',
       'Pakistani', 'Bolivian', 'Palestinian', 'Taiwanese', 'Paraguayan',
       'Nicaraguan', 'Tunisian', 'Sudanese', 'Tanzanian', 'Guyanese',
       'Senegalese', 'Bahamian', 'Bulgarian', 'Lebanese', 'Kenyan',
       'Nigerian', 'Georgian', 'Egyptian', 'Albanian', 'Azerbaijani',
       'Ivorian', 'Malaysian', 'Serbian', 'Singaporean', 'Lithuanian',
       'Slovak', 'Namibian', 'Ghanaian', 'Ugandan', 'Cameroonian',
       'English', 'New Zealander', 'Puerto Rican'], dtype=object)



In [9]:

    
artworks1 = artworks1.loc[~(artworks['Gender'] == '()')]
artworks1['Gender'].replace('(male)', '(Male)', inplace=True)
artworks1['Gender'].replace('\\(multiple_persons\\)', '(Various Painters)', inplace=True)
artworks1['Gender'] = artworks1['Gender'].apply(str).str.replace('\(|\)','')



In [10]:

    
artworks1.Gender.unique()









    Out[10]:





array(['Male', 'Various Painters', 'Female'], dtype=object)



In [11]:

    
artworks1['DateAcquired'] = pd.to_datetime(artworks1.DateAcquired)
artworks1['YearAcquired'] = artworks1.DateAcquired.dt.year
artworks1['YearAcquired'].dtype









    Out[11]:





dtype('int64')



In [12]:

    
# Convert dates to start date, cutting down number of distinct examples.
artworks1['Date'] = pd.Series(artworks1.Date.str.extract(
    '([0-9]{4})', expand=False))[:-1]

# Final column drops and NA drop.
X = artworks1.drop(['Department', 'DateAcquired', 'Artist', 'Nationality', 'Date','URL','ThumbnailURL'], 1)

# Create dummies separately.
artists = pd.get_dummies(artworks1.Artist)
nationalities = pd.get_dummies(artworks1.Nationality)
dates = pd.get_dummies(artworks1.Date)

# Concat artists with other variables
X = pd.get_dummies(X, sparse=True)
X = pd.concat([X, nationalities, dates], axis=1)

Y = artworks1.Department



In [13]:

    
#Preproces the ratings
names = X.columns
X_scaled = pd.DataFrame(preprocessing.scale(X), columns = names)



In [14]:

    
#PCA Analysis
# Build the correlation mtarix
correlation_matrix = X_scaled.corr()

#Calculate the eigenvectores & eigenvalues
eig_vals, eig_vecs = np.linalg.eig(correlation_matrix)
sklearn_pca = PCA(n_components=len(X_scaled.columns))
Y_sklearn = sklearn_pca.fit_transform(correlation_matrix)

#Plot the scree plot for visual analysis of the PCA features
plt.title('Scree Plot')
plt.plot(eig_vals)
plt.show()

#For additional aid, print the total variance explained by each of the eigenvalues
print('The percentage of total variance in the dataset explained \n', sklearn_pca.explained_variance_ratio_)









    












    



The percentage of total variance in the dataset explained 
 [  2.74348672e-02   1.59435728e-02   8.67333554e-03   7.29524552e-03
   6.75307988e-03   5.93666094e-03   5.75004211e-03   5.67832197e-03
   5.15704085e-03   5.10494183e-03   4.88694404e-03   4.85110340e-03
   4.74580264e-03   4.70259967e-03   4.64384639e-03   4.63105722e-03
   4.57766040e-03   4.54931534e-03   4.51130290e-03   4.44637737e-03
   4.40765648e-03   4.39221686e-03   4.35394910e-03   4.31319028e-03
   4.26001266e-03   4.18381250e-03   4.14151677e-03   4.10765713e-03
   4.05692038e-03   4.04096937e-03   4.01942669e-03   3.99430709e-03
   3.96415459e-03   3.95313178e-03   3.94242879e-03   3.90426612e-03
   3.86327515e-03   3.85626348e-03   3.81517043e-03   3.79666988e-03
   3.78320053e-03   3.77319702e-03   3.73991596e-03   3.73443227e-03
   3.69145078e-03   3.68463305e-03   3.68310578e-03   3.66334588e-03
   3.65481488e-03   3.63525928e-03   3.61256980e-03   3.60921395e-03
   3.59466801e-03   3.57566861e-03   3.57087388e-03   3.55036058e-03
   3.55007820e-03   3.52842791e-03   3.51880141e-03   3.50205940e-03
   3.47013606e-03   3.46583889e-03   3.46417872e-03   3.44997761e-03
   3.43943781e-03   3.42330772e-03   3.41065913e-03   3.40227528e-03
   3.40145168e-03   3.39127072e-03   3.38136088e-03   3.37050457e-03
   3.36311019e-03   3.34486774e-03   3.33966858e-03   3.32604191e-03
   3.32179499e-03   3.31602471e-03   3.31031237e-03   3.30045768e-03
   3.29265766e-03   3.28754952e-03   3.27687908e-03   3.27472919e-03
   3.26898178e-03   3.26638549e-03   3.25604245e-03   3.25512793e-03
   3.25140926e-03   3.24958877e-03   3.24412170e-03   3.24007308e-03
   3.23374429e-03   3.23151584e-03   3.22871114e-03   3.22813517e-03
   3.22687954e-03   3.22458635e-03   3.22323213e-03   3.22319552e-03
   3.22116627e-03   3.22047546e-03   3.21924433e-03   3.21729637e-03
   3.21630902e-03   3.21501766e-03   3.21309951e-03   3.21213748e-03
   3.21106611e-03   3.21075511e-03   3.20886410e-03   3.20828207e-03
   3.20754948e-03   3.20644165e-03   3.20384971e-03   3.20154100e-03
   3.20126540e-03   3.20012674e-03   3.19880978e-03   3.19731013e-03
   3.19360667e-03   3.18983764e-03   3.18945883e-03   3.18533674e-03
   3.18529040e-03   3.18264496e-03   3.18202206e-03   3.18159952e-03
   3.18064413e-03   3.18000337e-03   3.17983739e-03   3.17952195e-03
   3.17922274e-03   3.17902966e-03   3.17885631e-03   3.17824920e-03
   3.17807302e-03   3.17801118e-03   3.17762457e-03   3.17750813e-03
   3.17730785e-03   3.17663745e-03   3.17649693e-03   3.17568594e-03
   3.17545910e-03   3.17528831e-03   3.17466337e-03   3.17439692e-03
   3.17381302e-03   3.17369139e-03   3.17333582e-03   3.17304746e-03
   3.17298496e-03   3.17266801e-03   3.17243576e-03   3.17229371e-03
   3.17212781e-03   3.17193954e-03   3.17188974e-03   3.17166562e-03
   3.17160717e-03   3.17137644e-03   3.17125037e-03   3.17110590e-03
   3.17107560e-03   3.17101923e-03   3.17094224e-03   3.17090214e-03
   3.17084531e-03   3.17071952e-03   3.17069135e-03   3.17061728e-03
   3.17047725e-03   3.17034910e-03   3.17011108e-03   3.17002118e-03
   3.17000556e-03   3.16989468e-03   3.16982897e-03   3.16977941e-03
   3.16966845e-03   3.16958322e-03   3.16946725e-03   3.16943531e-03
   3.16939851e-03   3.16938604e-03   3.16936103e-03   3.16935869e-03
   3.16935805e-03   3.16935280e-03   3.16934631e-03   3.16932917e-03
   3.16932454e-03   3.16931424e-03   3.16929992e-03   3.16929447e-03
   3.16929388e-03   3.16929388e-03   3.16929388e-03   3.16929388e-03
   3.16929388e-03   3.16920122e-03   3.16779917e-03   3.16246987e-03
   3.15709171e-03   3.15468267e-03   3.14761383e-03   3.14248397e-03
   3.13837570e-03   3.13549099e-03   3.12815967e-03   3.12165812e-03
   3.11887308e-03   3.11398172e-03   3.10656745e-03   3.09590621e-03
   3.09230059e-03   3.09070876e-03   3.08612089e-03   3.07771269e-03
   3.06954909e-03   3.06775144e-03   3.05508041e-03   3.05158646e-03
   3.04875986e-03   3.03748975e-03   3.03561246e-03   3.02208889e-03
   3.01754473e-03   3.00688828e-03   2.99792740e-03   2.97915104e-03
   2.97306801e-03   2.96692934e-03   2.95662999e-03   2.95023129e-03
   2.91410525e-03   2.91018448e-03   2.90088620e-03   2.89495732e-03
   2.88491996e-03   2.87282934e-03   2.86353567e-03   2.84647619e-03
   2.83546321e-03   2.81144658e-03   2.80384280e-03   2.79066969e-03
   2.78536974e-03   2.76713876e-03   2.75624355e-03   2.72898117e-03
   2.72640305e-03   2.72113857e-03   2.69804192e-03   2.69089015e-03
   2.68562910e-03   2.65593660e-03   2.63190896e-03   2.61886658e-03
   2.61298099e-03   2.59237986e-03   2.56074944e-03   2.53914040e-03
   2.52769051e-03   2.51309328e-03   2.50261112e-03   2.49107484e-03
   2.45313785e-03   2.44582190e-03   2.42490674e-03   2.40956149e-03
   2.35098680e-03   2.31492993e-03   2.29402995e-03   2.23963712e-03
   2.23203179e-03   2.19668718e-03   2.18937346e-03   2.14744664e-03
   2.11899177e-03   2.10675942e-03   2.03936579e-03   2.00460680e-03
   1.97140190e-03   1.95437489e-03   1.93567252e-03   1.88658786e-03
   1.86347616e-03   1.80418605e-03   1.76526505e-03   1.61515480e-03
   1.52197018e-03   1.42861874e-03   1.06889613e-03   9.97226429e-04
   9.55738679e-04   2.37464580e-04   2.37358316e-08   5.00360123e-27
   5.40987992e-33   1.30406008e-34]



In [15]:

    
#Calculate Feature Importance using Random Forest
#Start and fit the Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(X_scaled, Y)

#Define feature importance
feature_importance = rf.feature_importances_

# Make importances relative to max importance
feature_importance = 100.0 * (feature_importance / feature_importance.max())
sorted_idx = np.argsort(feature_importance)
pos = np.arange(sorted_idx.shape[0]) + .5

#Plot the relative importance of each feature
plt.figure(figsize=(7, 20))
plt.subplot(1, 1, 1)
plt.barh(pos, feature_importance[sorted_idx], align='center')
plt.yticks(pos, X.columns[sorted_idx])
plt.xlabel('Relative Importance')
plt.title('Features Selection (Random Forest)')
plt.show()



In [16]:

    
#Feature Selection using KBest
#Scores for the most relevant features start with the one that has more explanatory power
# Initialize and fit the model for features extraction
test = SelectKBest()
fit = test.fit(X_scaled, Y)

#Identify features with highest score from a predictive perspective
#Create dataframe with the features ordered by their explanatory power
features_names = X_scaled.columns
Bestfeatures = pd.DataFrame(fit.scores_, index = features_names)
Bestfeatures.columns = ['Best Features']
Bestfeatures.sort_values(by=['Best Features'], ascending=False).head(30)









    Out[16]:







  
    
      
      Best Features
    
  
  
    
      Height (cm)
      4044.423777
    
    
      Width (cm)
      2383.681027
    
    
      French
      1597.309307
    
    
      American
      1346.024309
    
    
      Gender_Various Painters
      1069.316013
    
    
      Multiple Nationalities
      1069.316013
    
    
      YearAcquired
      1038.534357
    
    
      2003
      844.596609
    
    
      Swiss
      389.781312
    
    
      Spanish
      311.846061
    
    
      1971
      293.554745
    
    
      1926
      238.357178
    
    
      Portuguese
      227.604011
    
    
      1860
      222.061423
    
    
      Dutch
      215.566601
    
    
      Gender_Female
      209.077563
    
    
      1940
      179.438722
    
    
      1869
      144.086299
    
    
      2002
      143.626033
    
    
      1857
      137.082525
    
    
      2004
      135.599236
    
    
      1843
      129.209907
    
    
      1873
      117.603728
    
    
      1914
      115.427332
    
    
      Belgian
      109.622769
    
    
      1899
      103.059586
    
    
      1875
      100.190663
    
    
      Italian
      98.491622
    
    
      Brazilian
      97.547814
    
    
      Polish
      96.721255



In [17]:

    
# Features selection with Recursive Feature Elimination RFE model
#Set up the max number of features as indicated by PCA Analysis: number of features = 6
n_features = len(X_scaled)

#Initialize the model and fit
lr = LogisticRegression()
rfe = RFE(lr,n_features)
fit = rfe.fit(X_scaled,Y)

# Summarize the features selection. Based on the number of features from the PCA analysis
#show all the features selected (true) and left out (false)
result_RFE = pd.DataFrame(list(zip(X_scaled.head(0), rfe.ranking_, rfe.support_)),columns=['Features','Ranking','Support'] )
result_RFE.sort_values('Ranking').head(30)









    Out[17]:







  
    
      
      Features
      Ranking
      Support
    
  
  
    
      0
      Height (cm)
      1
      True
    
    
      204
      1920
      1
      True
    
    
      203
      1919
      1
      True
    
    
      202
      1918
      1
      True
    
    
      201
      1917
      1
      True
    
    
      200
      1916
      1
      True
    
    
      199
      1915
      1
      True
    
    
      205
      1921
      1
      True
    
    
      198
      1914
      1
      True
    
    
      196
      1912
      1
      True
    
    
      195
      1911
      1
      True
    
    
      194
      1910
      1
      True
    
    
      193
      1909
      1
      True
    
    
      192
      1908
      1
      True
    
    
      191
      1907
      1
      True
    
    
      197
      1913
      1
      True
    
    
      206
      1922
      1
      True
    
    
      207
      1923
      1
      True
    
    
      208
      1924
      1
      True
    
    
      223
      1939
      1
      True
    
    
      222
      1938
      1
      True
    
    
      221
      1937
      1
      True
    
    
      220
      1936
      1
      True
    
    
      219
      1935
      1
      True
    
    
      218
      1934
      1
      True
    
    
      217
      1933
      1
      True
    
    
      216
      1932
      1
      True
    
    
      215
      1931
      1
      True
    
    
      214
      1930
      1
      True
    
    
      213
      1929
      1
      True



In [21]:

    
X_selected = X_scaled[['Height (cm)', 'Width (cm)', 'French','American',
              'Gender_Various Painters','Multiple Nationalities',
              'YearAcquired','2003', 'Swiss', 'Spanish', '1971', '1926', 'Portuguese',
              '1860', 'Dutch', 'Gender_Female', '1940', '1869', '2002',
              '1857', '2004', '1843', '1873', '1914', 'Belgian', '1899', '1875']]



In [22]:

    
#Split the data into training and testing datasets. Split: 70/30; train/test

X_train, X_test, y_train, y_test = train_test_split(X_selected,Y, test_size=0.3, random_state=123)

#Initiating the cross validation generator, N splits = 5

kf = KFold(5)



In [36]:

    
# Initialize and fit the model.
mlp = MLPClassifier(max_iter=500, tol = 0.001)

#Tune hyperparameters
#Create range of values to fit parameters

hidden_layer_sizes= [(100,20,), (200,50,), (400,100,)]
activation = ['logistic']
learning_rate_init = [0.001, 0.01, 1]

parameters = {'hidden_layer_sizes': hidden_layer_sizes,
             'activation': activation,
             'learning_rate_init': learning_rate_init}

#Fit parameters using gridsearch
mlp_tuned = GridSearchCV(mlp, param_grid=parameters, cv=3)

#Fit the tunned classifier in the training space
mlp_tuned.fit(X_train, y_train)

#Print the best parameters
print(mlp_tuned.best_params_)    

#Print mlp score
print(mlp_tuned.score(X_train, y_train))









    



{'activation': 'logistic', 'hidden_layer_sizes': (400, 100), 'learning_rate_init': 0.001}
0.740305709663



In [37]:

    
#Fit on Test set
mlp_tuned.fit(X_test, y_test)

predtest_y = mlp_tuned.predict(X_test)



In [38]:

    
cross_val_score(mlp_tuned, X_test, y_test, cv=3).mean()









    Out[38]:





0.67466662578177861



In [39]:

    
Y.value_counts()/len(Y)









    Out[39]:





Prints & Illustrated Books    0.536797
Photography                   0.214247
Drawings                      0.108565
Architecture & Design         0.104156
Painting & Sculpture          0.036235
Name: Department, dtype: float64

In the artwork dataset, a multi-layer perceptron has been run on the selected features. The features have been selected using Random Forest, KBest and RFE.

The final features that have been used are:

'Height (cm)', 'Width (cm)', 'French','American','Gender_Various Painters','Multiple Nationalities','YearAcquired','2003', 'Swiss', 'Spanish', '1971', '1926', 'Portuguese', '1860', 'Dutch', 'Gender_Female', '1940', '1869', '2002', '1857', '2004', '1843', '1873', '1914', 'Belgian', '1899', '1875'

The overall accuracy has been increased form an initial 55% to 67% (cross validation accuracy) by increasing the number of hidden layers to 2 with 400 and 100 neurons. Furthermore, the learning rate_init has been set to 0.001 from 1e-4 and the activation functions is logistic. All the parameters have been set using GridsearchCV with 3 folds.

	Best Features
Height (cm)	4044.423777
Width (cm)	2383.681027
French	1597.309307
American	1346.024309
Gender_Various Painters	1069.316013
Multiple Nationalities	1069.316013
YearAcquired	1038.534357
2003	844.596609
Swiss	389.781312
Spanish	311.846061
1971	293.554745
1926	238.357178
Portuguese	227.604011
1860	222.061423
Dutch	215.566601
Gender_Female	209.077563
1940	179.438722
1869	144.086299
2002	143.626033
1857	137.082525
2004	135.599236
1843	129.209907
1873	117.603728
1914	115.427332
Belgian	109.622769
1899	103.059586
1875	100.190663
Italian	98.491622
Brazilian	97.547814
Polish	96.721255

	Features	Ranking	Support
0	Height (cm)	1	True
204	1920	1	True
203	1919	1	True
202	1918	1	True
201	1917	1	True
200	1916	1	True
199	1915	1	True
205	1921	1	True
198	1914	1	True
196	1912	1	True
195	1911	1	True
194	1910	1	True
193	1909	1	True
192	1908	1	True
191	1907	1	True
197	1913	1	True
206	1922	1	True
207	1923	1	True
208	1924	1	True
223	1939	1	True
222	1938	1	True
221	1937	1	True
220	1936	1	True
219	1935	1	True
218	1934	1	True
217	1933	1	True
216	1932	1	True
215	1931	1	True
214	1930	1	True
213	1929	1	True